#!/usr/bin/env python
#coding=utf-8
import re
import urllib2

if __name__ == '__main__':
       url = ' http://book.zongheng.com/showchapter/45669.html'
       html = urllib2.urlopen(url).read().decode('gbk')
       print 'loaded.'
       for m in re.finditer(ur'\<a[^\<\>]*href=[\'\"]?(?P<href>[^\s\<\>]+)[\'\"]?[^\<\>]*\>(?P<ch>第[^\<\>]+章[^\<\>]+)\<\/a\>',
html, re.I|re.S):
               print m.group('ch'), m.group('href')
